home *** CD-ROM | disk | FTP | other *** search
- /************************************************************************
- * *
- * Program: mail2HTML.c *
- * *
- *----------------------------------------------------------------------*
- * Description: Convert Mail/News Files to HTML (Prototype) *
- * *
- *----------------------------------------------------------------------*
- * Copyright (C) 1993 Basis Systeme netzwerk (BSn) *
- * Franz-Wolter Strasse 42 *
- * D-8000 Munich 81 *
- * Federal Republic of Germany *
- * *
- * Redistribution and use in source and binary forms are permitted *
- * provided that the above copyright notice and this paragraph are *
- * duplicated in all such forms and that any documentation, *
- * advertising materials, and other materials related to such *
- * distribution and use acknowledge that the software was developed *
- * by Basis Systeme netzwerk/Munich. *
- * *
- * This is distributed in the hope that it will be useful, but *
- * WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
- * *
- ************************************************************************/
-
- #ifndef lint
- static char RCS_id[] = "$Header: /usr/export/home/edz/WWW/experimental/RCS/mail2html.c,v 0.02 1993/01/21 20:36:49 edz Exp edz $";
-
- #endif
- /*************
- * $Log: mail2html.c,v $
- * Revision 0.3 1993/01/21 20:36:49 edz
- * Prepare for distribution
- *
- * Revision 0.2 1993/01/21 08:10:49 edz
- * Removed Redundant address from Table of Contents structure
- * Changed the name of a few functions
- *
- * Revision 0.1 1993/01/10 20:25:49 edz
- * Initial revision
- *
- *************/
- #define _MAIN_C
- /************************************************************************
- * WARNING: *
- * DON'T complain that this looks like a "one-off" hack! Why? *
- * 'cause it is. *
- * I don't seem to have any mail around here that breaks this *
- * program and it seems to work (and I hope follows RFC822) but *
- * I am sure that SOMEONE must have some mail that this very simple *
- * parser breaks (Greetings from Murphy). *
- * *
- * NOTES: *
- * You need an ANSI-C compiler and libraries to compile this. *
- * *
- * (Q) Why did I not use Perl? *
- * (A) I have not figured out why EVERYONE else in using Perl *
- * for these kinds of programs. *
- * (Q) Why did I write this? *
- * (A) Mail like Usenet News is a very common information source *
- * that is already Hypertext (The file system, man pages and *
- * your Window systems are also Hypertext). *
- * Before you even have a chance to ask. Hypertext is a MODEL *
- * of Representation not an implementation. The World Wide Web, *
- * SOFABED, HyTime, Xerox Notes, HyperCard, HyTelnet ... are *
- * all implementations of a model for hypertext. The programs *
- * www, viola, hytelnet, notes, ... are all user interfaces to *
- * the respective implementations. *
- * *
- * The often cited reasons for the success of the Gopher model *
- * in contrast to the W3 model: that W3 requires hypertext *
- * documents and that hypertext documents are rare is faulty. *
- * A html editor would be welcome but is not as important as *
- * the community belief has stipulated. *
- * W3 represents a paradigm change. *
- * *
- * With Man pages, Mail, News, native HTML, GNU Info, *
- * Hytelnet/MaxThink, Internet Resource files (FAQs), *
- * WAIS, Gopher (the current gateway is incomplete), *
- * Archie, Directory Assistance (DIXIE/X500), CSO phone *
- * books *
- * ... there is more than enough! *
- * *
- * With some simple and transparents auto-tagers the whole *
- * Internet, Usenet and ... is the web. We hope to have a man *
- * page autotagger done in the near future (the current crop *
- * --- at least the ones I know--- are NOT autotaggers). *
- * *
- * This program is only a technology test. We hope to have a *
- * a mail transport done is the very near future. *
- * We are currently experimenting with several user authenitication *
- * schemes (a'la POP). *
- * *
- * If you use this TEST-code we would like to hear from you. *
- * *
- * TODO: *
- * (i) Add Support for MIME (RFC1341) *
- * (ii) Add Support for Configuration (RRC1343) *
- * (ii) Add Support for Multinational headers (RFC1342) *
- * (iii) Add X.400 Support *
- * (iv) Use a more intellegent message ID algorithm. The current *
- * implementation requires that the folder that contains the *
- * reference was at one time accessed, viz. that a delivery *
- * or cron process ran a bogus parse. Given the other info *
- * available (eg. time) one could narrow the search. *
- * (v) Use ndbm/gdbm instead of stupid ASCII file list/Remove *
- * ID Duplication. *
- * (vi) Fold into WWW Daemon (see Gopher), eg. *
- * WWW/Mail/mbox returns the table of Contents *
- * references have names like "/MessageID/AA04187@BSNGATE" *
- * (vii) Fold in state system transport (currently its stateless) *
- ************************************************************************/
- /*-
- * /MessageID/<MAIL-ID> returns the .html file corresponding to <MAIL-ID>
- *
- */
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <sys/stat.h>
- #include "Entities.h"
- #include "Ctypes.h"
- #include "Ctypes.c"
-
- #ifndef MAXPATHLEN
- # ifdef _MAX_PATH
- # define MAXPATHLEN _MAX_PATH
- # else
- # define MAXPATHLEN 1024
- # endif
- #endif
-
- /*----------------------- User Configuration ----------------------------- */
- /* The file below must be readable and writeable */
- #define MESSAGE_INDEX_FILE "/var/adm/MESSAGES.INDEX"
- #define CONTENTS_EXTENSION ".MAIL_BODIES"
- #define DIRECTORY_EXTENSION ".TABLE_OF_CONTENTS"
-
- /*---------------------- End User Configuration -------------------------- */
-
- #ifndef TRUE
- # define TRUE 1
- # define FALSE 0
- #endif
-
- /* Exit Codes */
- #define E_ENOENT 02, "Sorry Document is not available or access is restricted"
- #define E_USAGE 64, "Incorrect Document request"
- #define E_NOINPUT 66, "Cannot open input"
- #define E_IOERR 74 "input/output error"
- #define E_SOFTWARE 70, "internal software error"
- #define E_NOTEMP 75, "INTERNAL ERROR: Can't create a file!"
-
- static const char IndexFile[] = MESSAGE_INDEX_FILE;
- static const char body_ext[] = CONTENTS_EXTENSION;
- static const char HEX[] = "0123456789ABCDEF";
-
- #ifdef _MSC_VER /* The Microsoft compiler (Xenix/OS2/NT) */
- # define strncasecomp strnicmp
- #else
-
- /* Case INDEPENDENT version of strncmp() */
- static int
- strncasecomp(const char *str1, const char *str2, size_t n)
- {
- const char *p = str1;
- const char *q = str2;
- int diff;
-
- for (p = str1, q = str2;; p++, q++) {
- if (p == str1 + n)
- return 0;
- if (*p == '\0' || *q == '\0')
- return *p - *q;
- if ((diff = tolower(*p) - tolower(*q)) != 0)
- return diff;
- }
- /* NOTREACHED */
- }
-
- #endif
-
- #ifdef NEED_STRDUP
- static char *
- strdup(const char *str)
- {
- char *tcp;
-
- if ((tcp = (char *) malloc(strlen(str) + 1)) != NULL)
- strcpy(tcp, str);
- return tcp;
- }
- #endif
-
-
- /* Structure to Build a table of contents */
- typedef struct _contents {
- char *anchor; /* Reference (HREF="<anchor>") */
- char *subject;/* Name of anchor */
- char *author; /* Full Name of Author */
- int isNews; /* 1 ==> News else Mail */
- char *group; /* Newgroup (NULL if Mail) */
- long start; /* start of message (offset) */
- struct _contents *next; /* Next element in linked list */
- } contents_t;
-
- /* Add an anchor to the table of contents */
- void
- AddMessage(contents_t ** Contents, long start, int isNews, const char *anchor,
- const char *subject, const char *author, const char *group)
- {
- contents_t *tp;
-
- /* Build a table of contents in reverse order */
- if ((tp = (contents_t *) malloc(sizeof(contents_t))) != NULL) {
- tp -> start = start;
- tp -> isNews = isNews;
- tp -> anchor = strdup(anchor);
- tp -> subject = strdup(subject);
- tp -> author = strdup(author);
- tp -> group = (isNews ? strdup(group) : NULL);
- tp -> next = *Contents;
- *Contents = tp;
- }
- }
-
- /*
- * This is a quick hack to speed up searching the MESSAGES.INDEX file
- * for the correct entry
- */
- char
- HASH(const unsigned char *name)
- {
- unsigned short hash = 0;
- int i;
-
- for (i = 0; name[i]; i++)
- hash += (((short)TOISO(name[i])) << (1 + (i % 8))) + name[i];
- return (char) ((hash % 225) + 30);
- }
-
- /* Encode reference as HTML compliant */
- static char *
- EncodeAnchor(char *buf, const unsigned char *anchor, int case_sensitive)
- {
- char *tp1 = buf;
- const char *tp2;
- unsigned char ch;
-
- /* Note RFC822 specifies 7-bit headers (Message IDs are 6 bit) */
- /* Replace non acceptable chars (# and %) and make uppercase */
- for (tp2 = anchor; (ch = *tp2) != '\0'; tp2++)
- if (!ISPATH(ch) || ch == '#' || ch == '%') {
- *tp1++ = '%';
- *tp1++ = HEX[(TOISO(ch) & '\377') >> 4];
- *tp1++ = HEX[(TOISO(ch) & '\377') % 16];
- } else
- *tp1++ = (case_sensitive ? (char)ch : (char)TOUPPER(ch));
- *tp1 = '\0';
- return (buf);
- }
-
- /* Append dictionary to parsed message id list */
- void
- DumpDictionary(contents_t * Contents, const char *filename, long end)
- {
- if (Contents) {
- contents_t *tp;
- FILE *fp;
- char path[MAXPATHLEN + 256];
-
- if ((fp = fopen(IndexFile, "a")) == NULL)
- return;
-
- EncodeAnchor(path, filename, TRUE);
- for (tp = Contents; tp != NULL; tp = tp -> next) {
- fprintf(fp, "%c%s\t%s%s\t%ld-%ld\tFrom %s: %s\n",
- HASH(tp -> anchor),
- tp -> anchor,
- path, body_ext,
- tp -> start, end,
- tp -> author, tp -> subject);
- end = tp -> start;
- }
- }
- }
-
-
- /* Print the Table of Contents */
- void
- PrintContents(contents_t * Contents, FILE * fp)
- {
- if (Contents != NULL) {
- contents_t *tp;
-
- fprintf(fp, "<!-- Table of Contents for this file (reverse order) -->\n");
- fprintf(fp, "<H1>Table of Contents</H1>\n<DL>\n");
- for (tp = Contents; tp != NULL; tp = tp -> next) {
- fprintf(fp, "<DT><A HREF=\"/MessageID/%s\">", tp -> anchor);
- if (tp -> isNews)
- fprintf(fp, "%s in %s", tp -> author, tp -> group);
- else
- fprintf(fp, "%s from %s", "Mail", tp -> author);
- fprintf(fp, "</A><DD>%s\n", tp -> subject);
- }
- fprintf(fp, "</DL><P>\n\n");
- }
- }
-
- /* Strip trailing white space */
- char *
- StripTail(char *line)
- {
- char *tcp = line + strlen(line) - 1;
-
- while (*tcp == '\r' || *tcp == '\n' || *tcp == ' ' || *tcp == '\t')
- *tcp-- = '\0';
- return line;
- }
-
- /* Strip trailing white space and move to first non-white character */
- static char *
- HTStrip(char *line)
- {
- char *tcp;
-
- for (tcp = StripTail(line); ISWHITE(*tcp); tcp++)
- /* loop */ ;
- return tcp;
- }
-
-
- /* Rewind input and copy to output stream */
- static void
- CatStream(FILE * infp, FILE * outfp)
- {
- register int ch;
-
- if (infp) {
- fflush(infp);
- rewind(infp);
- while ((ch = getc(infp)) != EOF)
- putc((char) ch, outfp);
- }
- }
-
- /* Decode HTML reference */
- static char *
- DecodeAnchor(char *buf, const unsigned char *anchor)
- {
- char *tp1 = buf;
- const char *tp2 = anchor;
-
- while (*tp2) {
- if (*tp2 == '%') {
- char *tcp;
- unsigned ch = 0;
-
- if ((tcp = strchr(HEX, *++tp2)) != NULL)
- ch = (tcp - HEX) << 4;
- if ((tcp = strchr(HEX, *++tp2)) != NULL)
- ch += tcp - HEX;
- *tp1 = (char)ch;
- tp2++;
- } else
- *tp1++ = *tp2++;
- }
- *tp1 = '\0';
- return buf;
- }
-
-
- /* In "XXX <YYY> ZZZZ" return "YYY" */
- static char *
- MessageKey(char *buf, char *line)
- {
- char *tp1;
- char *tp2;
-
- if ((tp1 = strchr(line, '<')) != NULL) {
- if ((tp2 = strchr(++tp1, '>')) != NULL)
- *tp2 = '\0';
- } else
- tp1 = line;
- /* Message Keys are CASE INSENSITIVE */
- return EncodeAnchor(buf, HTStrip(tp1), FALSE);
- }
-
- /*-
- * Find Author's name in mail address
- * In "XXX (YYY)" or YYY <XXX>" return "YYY"
- * Find Author's address in mail address
- * In "XXX (YYY)" or YYY <XXX>" return "XXX"
- */
- static char *
- NameKey(char *buf, const char *key, int author)
- {
- char *s, *e;
- char email[256];
- char p1, p2, b1, b2;
-
- if (author) {
- p1 = '('; p2 = ')';
- b1 = '<'; b2 = '>';
- } else {
- p1 = '<'; p2 = '>';
- b1 = '('; b2 = ')';
- }
-
- strcpy(email, key);
- if (((s = strchr(email, p1)) != NULL) && ((e = strchr(email, p2)) != NULL)) {
- if (e > s) {
- *e = '\0'; /* Chop off everything after p2 (')' or '>') */
- strcpy(email, s + 1);
- }
- } else if (((s = strchr(email, b1)) != NULL) && ((e = strchr(email, b2)) != NULL)) {
- if (e > s)
- strcpy(s, e + 1); /* Remove <...> or (...) */
- }
- strcpy(buf, HTStrip(email)); /* Remove leading and trailing spaces */
- return buf;
- }
-
-
-
- /*
- * This Function returns a static storage area, it is the duty of the caller
- * to save it.
- */
- static char *
- Anchor(char *line)
- {
- char *tp1;
- char *tp2 = line;
- static char tmp[BUFSIZ];
-
- tmp[0] = '\0';
- while (tp2 != NULL && (tp1 = strchr(tp2, '<')) != NULL) {
- if (tp1 > tp2) {
- *tp1 = '\0';
- strcat(tmp, tp2);
- }
- if ((tp2 = strchr(++tp1, '>')) != NULL)
- *tp2++ = '\0';
- tp1 = HTStrip(tp1);
- strcat(tmp, "<A HREF=\"/MessageID/");
- EncodeAnchor(tmp + strlen(tmp), tp1, FALSE);
- sprintf(tmp + strlen(tmp), "\">%s</A>", tp1);
- } /* while */
- if (tp2 != NULL)
- strcat(tmp, tp2);
- return tmp;
- }
-
- /*-
- * Send out a line of the message body.
- * (1) Use Latin-1 public entities
- * (2) Translate URLs, eg. ftp://site:port/path to:
- * <A HREF="ftp/site:port/path">ftp/site:port/path</A>
- * (3) Markup VT100 Style underlined text (eg. man) as
- * Strong.
- * We don't confirm that the protocol is valid (registered)
- * --- would be very simple but the list is growing too fast
- * (file, ftp, http, wais, gopher, prospero, ... )
- */
- static char *
- BodyLine(char *outbuf, unsigned char *line, int nl)
- {
- unsigned char *tcp = outbuf;
- unsigned char *tp = line;
- unsigned char ch;
-
- while (( ch = *tp) != '\0')
- if (ch == '_' && *(tp+1) == '\b') {
- char buf[256];
- char tmp[256];
- char *ptr = buf;
-
- /* VT100 Underlined text */
- do {
- tp += 2; /* Skip _<Ctrl-H> */
- *ptr++ = *tp++;
- } while (*tp == '_' && *(tp+1) == '\b');
- *ptr = '\0';
-
- ptr = BodyLine(tmp, buf, FALSE); /* Fixup chars */
- /* Underlined Text is marked strong */
- strcpy(tcp, "<strong>");
- strcat(tcp, ptr);
- strcat(tcp, "</strong>");
- tcp += strlen(tcp);
-
- } else if ((ch == ':') && (*(tp+1) == '/') && (*(tp+2) == '/') &&
- (tp >= &line[2]) && ISALPHA(*(tp-1))) {
- unsigned char *tp2;
-
- /* Saw a URL Magic Back up */
- do {
- --tp; --tcp;
- } while (ISALPHA(*tp) && tp >= line);
- if (!ISALPHA(*tp)) {
- tp++; tcp++;
- }
-
- /* Insert Anchor */
- for (tp2 = tp; ISPATH(*tp2); tp2++)
- /* loop */ ;
- if (( *(tcp-1) == '"' || *(tcp-1) == '\'') && *tp2 == *(tp-1)) {
- /* quoted arguments */
- *tp2++ = '\0'; /* ASCIIZ */
- sprintf(--tcp, "<A HREF=\"%s\">%s%c</A>", tp, tp - 1, *(tp-1));
- } else {
- char ch;
-
- ch = *tp2; /* Save character */
- *tp2 = '\0'; /* ASCIIZ */
- sprintf(tcp, "<A HREF=\"%s\">%s</A>", tp, tp);
- *tp2 = ch; /* Replace character */
- }
- tp = tp2; /* Set pointer to tail */
- tcp += strlen(tcp); /* Go to tail */
- } else if (Markups[ch].len) {
- memcpy(tcp, Markups[ch].entity, Markups[ch].len);
- tcp += Markups[ch].len;
- tp++;
- } else if (!ISASCII(ch) || (ISCNTRL(ch) && !ISWHITE(ch))) {
- *tcp++ = '&';
- *tcp++ = '#';
- *tcp++ = (unsigned char) ((TOISO(ch) / 100) + '0');
- *tcp++ = (unsigned char) ((TOISO(ch) % 100) / 10 + '0');
- *tcp++ = (unsigned char) ((TOISO(ch) % 10) + '0');
- *tcp++ = ';';
- tp++;
- } else
- *tcp++ = *tp++;
- if (nl) *tcp++ = '\n';
- *tcp = '\0';
- return outbuf;
- }
-
- /* Put a line of the Body */
- static void
- PutBodyLine(unsigned char *line, FILE * outfp)
- {
- unsigned char outbuf[BUFSIZ];
-
- fputs(BodyLine(outbuf, line, TRUE), outfp);
- }
-
- /* Headers should be 7 bit */
- static char *
- HeaderLine(char *outbuf, unsigned char *line)
- {
- /* For now pretend we have 8 bit headers (latter RFC1342) */
- return BodyLine(outbuf, line, FALSE);
- }
-
- /*-
- * IsMailFromLine - Is this a legal unix mail "From " line?
- *
- * Given a line of input will check to see if it matches the standard
- * unix mail "from " header format. Returns 0 if it does and <0 if not.
- *
- * 2 - Very strict, also checks that each field contains a legal value.
- *
- * Assumptions: Not having the definitive unix mailbox reference I have
- * assumed that unix mailbox headers follow this format:
- *
- * From <person> <date> <garbage>
- *
- * Where <person> is the address of the sender, being an ordinary
- * string with no white space imbedded in it, and <date> is the date of
- * posting, in ctime(3C) format.
- *
- * This would, on the face of it, seem valid. I (Bernd) have yet to find a
- * unix mailbox header which doesn't follow this format.
- *
- * From: Bernd Wechner (bernd@bhpcpd.kembla.oz.au)
- * Obfuscated by: KFS (as usual)
- */
-
- static int
- IsMailFromLine(char *line)
- {
- #define MAX_FIELDS 10
- char *fields[MAX_FIELDS];
- char *sender_tail;
- register char *lp, **fp;
- register int n, i;
- const char legal_day[] = "SunMonTueWedThuFriSat";
- const char legal_month[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
- const int legal_numbers[] = {1, 31, 0, 23, 0, 59, 0, 60, 1969, 2199};
-
- if (strncmp(line, "From ", 5)) return -100;
-
- lp = line + 5;
- /* sender day mon dd hh:mm:ss year */
- for (n = 0, fp = fields; n < MAX_FIELDS; n++) {
- while (*lp && *lp != '\n' && ISASCII(*lp) && ISWHITE(*lp)) lp++;
- if (*lp == '\0' || *lp == '\n') break;
- *fp++ = lp;
- while (*lp && ISASCII(*lp) && !ISWHITE(*lp))
- if (*lp++ == ':' && (n == 4 || n == 5)) break;
- if (n == 0) sender_tail = lp;
- }
-
- if (n < 8) return -200 - n;
-
- fp = fields;
-
- if (n > 8 && !ISNUM(fp[7][0])) fp[7] = fp[8]; /* ... TZ year */
- if (n > 9 && !ISNUM(fp[7][0])) fp[7] = fp[9]; /* ... TZ DST year */
-
- fp++;
- for (i = 0; i < 21; i += 3)
- if (strncmp(*fp, &legal_day[i], 3) == 0) break;
- if (i == 21) return -1;
-
- fp++;
- for (i = 0; i < 36; i += 3)
- if (strncmp(*fp, &legal_month[i], 3) == 0)
- break;
- if (i == 36) return -2;
-
- for (i = 0; i < 10; i += 2) {
- lp = *++fp;
- if (!ISNUM(*lp)) return -20 - i;
- n = atoi(lp);
- if (n < legal_numbers[i] || legal_numbers[i + 1] < n) return -10 - i;
- }
- return 0;
- }
-
- /*-
- * Start of News:
- * "Article <Number> of <Newsgroup>:"
- */
- static int
- IsNewsLine(char *line, int *article, char **group)
- {
- int i;
-
- if (strncmp(line, "Article ", 8)) return -500;
- line += 8;
- /* Skip white space */
- while (ISWHITE(*line)) line++;
-
- if (!ISNUM(*line)) return -400;
- i = atoi(line);
- /* skip number data */
- while (ISNUM(*line)) line++;
-
- if (!ISWHITE(*line)) return -300;
- /* Skip white space */
- while (ISWHITE(*line)) line++;
-
- if (line[0] != 'o' || line[1] != 'f') return -200;
- /* Skip the of */
- line += 2;
-
- if (!ISWHITE(*line)) return -100;
- /* Skip white space */
- while (ISWHITE(*line)) line++;
-
- if (*line == '\0') return -10; /* Missing Group */
-
- /* OK, if was "Article NNN of XXX.XXX.XXXXX:" */
- {
- static char grp[60];
- char tmp[126];
- size_t tail;
-
- strncpy(grp, HeaderLine(tmp, line), sizeof(grp) - 1);
- grp[sizeof(grp)] = '\0';
- /* Strip trailing ':' if it has one */
- if (grp[tail = strlen(grp) - 1] == ':')
- grp[tail] = '\0';
- if (group) *group = grp;
- if (article) *article = i;
- }
- return 0;
- }
-
- /* Structure to store the header information (envelope) */
- typedef struct {
- char cc[128]; /* cc: */
- char bcc[128]; /* bcc: */
- char from[64]; /* Reply-To: or From: */
- char address[64];
- char subject[128]; /* Subject: */
- char date[40]; /* Date: */
- char id[80]; /* Message-ID: */
- char keywords[256]; /* Keywords: */
- char organization[80]; /* Organization: */
- char followup[126]; /* Followup-To: */
- char newsgroups[BUFSIZ]; /* Newgroups: */
- char xrefs[BUFSIZ / 2]; /* In-Reply-To: */
- char refs[BUFSIZ]; /* References: */
- } envelope_t;
-
- static char *
- ReadHeaderLine(char *buf, size_t len, FILE *infp)
- {
- char *tcp;
- int ahead; /* lookahead token */
-
- if ((tcp = fgets(buf, len, infp)) != NULL) {
- /* Check if continuation line */
- while ((ahead = fgetc(infp)) == '\t' || ahead == ' ') {
- char tmp[256];
-
- if (fgets(tmp, sizeof(tmp), infp) != NULL) {
- tcp = StripTail(buf);
- strcat(tcp, " ");
- strcat(tcp, tmp);
- }
- } /* while */
- ungetc(ahead, infp); /* push back */
- }
- return StripTail(tcp);
- }
-
- static int
- ParseRFC822Header(envelope_t * envelope, FILE * infp)
- {
- char tmp[BUFSIZ];
- char *tcp;
- int mime = 0;
-
- memset(envelope, 0, sizeof(envelope_t));
-
- /* Read the header bits */
- /* Everything after first null line is message body */
- while ((tcp = ReadHeaderLine(tmp, sizeof(tmp), infp)) != NULL && *tcp) {
- /* TAGS in RFC-822 Header */
- switch (*tcp++) {
- case 'b': case 'B': /* possible bcc: */
- if (strncasecomp("cc: ", tcp, 4) == 0)
- HeaderLine(envelope -> bcc, tcp + 4);
- break;
- case 'c': case 'C': /* possible cc: or Content-<*>: */
- /* MIME NOT YET SUPPORTED */
- if (strncasecomp("ontent-", tcp, 7) == 0)
- mime++;
- else if (strncasecomp("c: ", tcp, 3) == 0)
- HeaderLine(envelope -> cc, tcp + 3);
- break;
- case 'd': case 'D': /* possible Date: */
- if (strncasecomp("ate: ", tcp, 5) == 0)
- HeaderLine(envelope -> date, tcp + 5);
- break;
- case 'f': case 'F': /* possible From: or Followup-To: */
- /* "Reply-to" SUPERSEDES the "From" field */
- if (*(envelope -> from) == '\0' && strncasecomp("rom: ", tcp, 5) == 0) {
- NameKey(envelope -> address, tcp + 5, FALSE);
- NameKey(envelope -> from, tcp + 5, TRUE);
- } else if (strncasecomp("ollowup-To: ", tcp, 12) == 0)
- strcpy(envelope -> followup, tcp + 12);
- break;
- case 'i': case 'I': /* possible In-Reply-To: */
- if (strncasecomp("n-Reply-To: ", tcp, 12) == 0)
- strcpy(envelope -> xrefs, tcp + 12);
- break;
- case 'k': case 'K': /* possible Keywords: */
- if (strncasecomp("eywords: ", tcp, 9) == 0)
- HeaderLine(envelope -> keywords, tcp + 9);
- break;
- case 'm': case 'M': /* possible Message-ID: or MIME-Version: */
- if (strncasecomp("essage-ID: ", tcp, 11) == 0)
- MessageKey(envelope -> id, tcp + 11);
- else if (strncasecomp("IME-Version: ", tcp, 13) == 0)
- mime++;
- break;
- case 'n': case 'N': /* possible Newsgroups: */
- if (strncasecomp("ewsgroups: ", tcp, 11) == 0)
- HeaderLine(envelope -> newsgroups, tcp + 11);
- break;
- case 'o': case 'O': /* possible Organization: */
- if (strncasecomp("rganization: ", tcp, 13) == 0)
- HeaderLine(envelope -> organization, tcp + 13);
- break;
- case 'r': case 'R': /* possible Reply-To: or References: */
- if (strncasecomp("eply-To: ", tcp, 9) == 0) {
- NameKey(envelope -> address, tcp + 9, FALSE);
- NameKey(envelope -> from, tcp + 9, TRUE);
- } else if (strncasecomp("eferences: ", tcp, 11) == 0)
- strcpy(envelope -> refs, tcp + 11);
- break;
- case 's': case 'S': /* possible Subject: or Sender: */
- if (strncasecomp("ubject: ", tcp, 8) == 0)
- HeaderLine(envelope -> subject, tcp + 8);
- break;
- } /* switch */
- }
-
- if (*(envelope -> id) == '\0') {
- static int count = 1;
-
- /* generate psuedo ID */
- sprintf(envelope -> id, "FAKE%05o", count++);
- }
- if (*(envelope -> subject) == '\0')
- strcpy(envelope -> subject, "No Subject");
- return mime;
- }
-
- static void
- LocateAnchors(contents_t ** Contents, int isNews, char *group, FILE * infp, FILE * outfp)
- {
- int mime;
- envelope_t envelope;
- static const char item[] = "<DT>%s:<DD>%s\n";
-
- #define xItem(s, n, v) if (s) fprintf(outfp, item, n, v);
- #define Item(n, v) xItem(*(v), (n), (v))
-
- mime = ParseRFC822Header(&envelope, infp);
-
- /* Add Message To Table of Contents */
- AddMessage(Contents
- ,ftell(outfp)
- ,isNews
- ,envelope.id
- ,envelope.subject
- ,envelope.from[0] ? envelope.from : "Annonymous"
- ,group);
-
- /* Print Header */
- fprintf(outfp, "<!-- Header -->\n<DL>\n");;
- Item("Subject", envelope.subject);
- Item("From", envelope.from);
- if (envelope.address[0])
- fprintf(outfp, "<DT>Reply to:<DD><ADDRESS>%s</ADDRESS>\n", envelope.address);
- Item("Organization", envelope.organization);
- Item("Date", envelope.date);
- xItem(envelope.xrefs[0], "In-Reply-To", Anchor(envelope.xrefs));
- xItem(envelope.refs[0], "References", Anchor(envelope.refs));
- Item("Followup-To", envelope.followup);
- Item("cc", envelope.cc);
- Item("bcc", envelope.bcc);
- Item("Keywords", envelope.keywords);
- if (isNews)
- fprintf(outfp, "<DT>Usenet %s:<DD>Article %d\n", group, isNews);
- xItem((envelope.newsgroups[0]) && (!isNews || strcmp(group, envelope.newsgroups)),
- "Cross Posted Newsgroups", envelope.newsgroups);
- fprintf(outfp, "</DL>\n");
- /* If we saw a MIME Header send out a warning message */
- if (mime)
- fprintf(outfp, "\
- <P><STRONG>WARNING: The message contained a MIME header (NOT YET Supported)</STRONG><P>\n");
- #undef Item
- #undef xItem
- }
-
- int
- ParseMail(contents_t ** Contents, FILE * infp, FILE * outfp)
- {
- char tmp[BUFSIZ];
- int count = 0;
- int lines = 0;
- int isNews;
- char *group;
-
- while (fgets(tmp, sizeof(tmp), infp) != NULL) {
- StripTail(tmp);
- if ((isNews = IsMailFromLine(tmp)) == 0 || IsNewsLine(tmp, &isNews, &group) == 0) {
- lines = 0;
- /* Mail header */
- if (count++)
- fprintf(outfp, "</PRE>\n");
- LocateAnchors(Contents, isNews, group, infp, outfp);
- } else {
- if (lines++ == 0)
- fprintf(outfp, "<PRE>\n\n");
- PutBodyLine(tmp, outfp); /* Body */
- }
- } /* while */
- if (lines) fprintf(outfp, "</PRE>");
- return count;
- }
-
- static void
- Fatal(int code, const char *message)
- {
- /* Send the error message to stdout */
- printf("<TITLE>Mail Server Error Message</TITLE>\n\
- <PLAINTEXT>\n\n%s\n\n", message);
- exit(code);
- }
-
- static const char *
- basename(char *string)
- {
- const char *tcp;
-
- for (tcp = string + strlen(string); *tcp != '/' && tcp > string; tcp--)
- /* loop */ ;
- return (*tcp == '/') ? ++tcp : tcp;
- }
-
- static int
- SendDocument(const char *title, const char *filename, long start, long end)
- {
- long length = end - start;
- int ch;
- FILE *fp;
-
- if ((fp = fopen(filename, "r")) == NULL)
- return -1;
-
- if (fseek(fp, start, 0) == -1)
- return -1; /* seek error */
-
- /* Produce HTML Document */
- fputs("<HTML>\n<HEAD>\n<TITLE>", stdout);
- fputs(title, stdout);
- fputs("</TITLE>\n</HEAD>\n<BODY>\n", stdout);
- fputs("<!-- Message Body Follows -->\n", stdout);
- while ((ch = getc(fp)) != EOF && length--)
- putc((char) ch, stdout);
- fputs("</BODY></HTML>\n", stdout);
- return 0;
- }
-
- int
- FetchMessage(const char *name)
- {
- FILE *fp;
- char tmp[BUFSIZ];
- const char hash = HASH(name);
- int result = -1;
-
- if ((fp = fopen(IndexFile, "r")) != NULL)
- while (fgets(tmp, sizeof(tmp), fp) != NULL)
- if (tmp[0] == hash) {
- char *anchor;
- char *filename;
- char *title;
- char *range;
-
- anchor = strtok(tmp+1, "\t");
- filename = strtok(NULL, "\t");
- range = strtok(NULL, "\t");
- title = strtok(NULL, "\n");
- if (anchor && filename && range && strcmp(anchor, name) == 0) {
- long start, end;
-
- if (sscanf(range, "%ld-%ld", &start, &end) != 2) continue;
- result = SendDocument(
- title ? title : anchor,
- DecodeAnchor(tmp, filename),
- start, end
- );
- break;
- }
- }
- fclose(fp);
- return result;
- }
-
-
- static FILE *
- OpenContentsFile(const char *name, const char *mode)
- {
- char tmp[MAXPATHLEN];
-
- strcpy(tmp, name);
- strcat(tmp, body_ext);
- return fopen(tmp, mode);
- }
-
- int
- main(int argc, char **argv)
- {
- FILE *outfp;
- FILE *fp;
- contents_t *Contents = NULL;
- char filename[MAXPATHLEN];
-
- InitCharTable();
- if (argc == 2) {
- DecodeAnchor(filename, argv[1]);
- if (strncmp(filename, "/MessageID/", 11) == 0) {
- if (FetchMessage(filename + 11))
- Fatal(E_ENOENT);
- return 0;
- } else {
- char tmp[MAXPATHLEN];
- struct stat statbuf1;
- struct stat statbuf2;
-
- /* Check if $1.html exists */
- strcpy(tmp, filename);
- strcat(tmp, DIRECTORY_EXTENSION);
- if (stat(filename, &statbuf1) == 0 && stat(tmp, &statbuf2) == 0 &&
- statbuf2.st_mtime > statbuf1.st_mtime && statbuf2.st_size > 200) {
- /* The cache exists and its newer */
- if ((outfp = fopen(tmp, "r")) != NULL) {
- CatStream(outfp, stdout); /* Cat the cache */
- fclose(outfp);
- return 0; /* DONE */
- } else
- outfp = stdout; /* Don't bother with caching */
- } else if ((outfp = fopen(tmp, "w+")) == NULL)
- outfp = stdout;
- }
- } else
- Fatal(E_USAGE);
-
- /* Open Input and parse to build Messages file */
- if ((fp = OpenContentsFile(filename, "w")) != NULL) {
- FILE *infp;
-
- if ((infp = fopen(filename, "r")) == NULL)
- Fatal(E_ENOENT);
- ParseMail(&Contents, infp, fp);
- fclose(infp);
- fflush(fp);
- DumpDictionary(Contents, filename, ftell(fp));
- fclose(fp);
- } else
- Fatal(E_NOTEMP);
-
- /* Produce HTML Entry Document */
- fputs("<!-- This Document has been Machine Converted -->\n", outfp);
- fprintf(outfp, "<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n</HEAD>\n<BODY>\n", basename(filename));
- if (Contents)
- PrintContents(Contents, outfp);
- else /* Was an error, so just cat the contents to recover */
- CatStream(OpenContentsFile(filename, "r"), outfp);
- fputs("</BODY></HTML>\n", outfp);
- if (fileno(outfp) != fileno(stdout))
- CatStream(outfp, stdout); /* Cat File to stdout */
- return 0;
- }
-